# 加载大模型的llm,然后提取vocab file
import sys
sys.path.insert(0,'../../../')
from wenet.utils.init_tokenizer import init_tokenizer
from gxl_ai_utils.utils import utils_file
config_path = "../conf/train_ASLP_ASRLLM.yaml"
config_dict = utils_file.load_dict_from_yaml(config_path)
tokenizer = init_tokenizer(config_dict)
symbal_table =  tokenizer.symbol_table
utils_file.print_dict(symbal_table)
symbal_file = './phi_tokens.txt'
utils_file.write_dict_to_scp(symbal_table,symbal_file)


